Exploratory Data Analysis

Code
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
from matplotlib.colors import LinearSegmentedColormap
import warnings

# Set default figure size
plt.rcParams['figure.figsize'] = (10, 6)

# Suppress future warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Set seaborn style
sns.set_theme(style="whitegrid")

# Load the dataset
df = pd.read_csv("data/security_incidents_cleaned.csv")

# Display the first few rows
print("Preview of the dataset:")
df.head()
Preview of the dataset:
year country un ingo icrc nrcs_and_ifrc nngo other nationals_killed nationals_wounded ... location latitude longitude motive actor_type actor_name details verified source high_impact
0 1997 Cambodia 0 0 1 0 0 0 1 0 ... Unknown 14.070929 103.099916 Unknown Unknown Unknown 1 ICRC national staff killed while working in ... Archived Archived False
1 1997 Rwanda 0 4 0 0 0 0 0 0 ... Office/compound -1.499840 29.634970 Unknown Unknown Unknown 3 INGO international (Spanish) staff killed, 1... Archived Archived False
2 1997 Tajikistan 4 0 2 0 0 0 0 0 ... Unknown 38.628173 70.815654 NaN Unknown Unknown 3 UN national staff, 1 UN international (Niger... Archived Archived False
3 1997 Somalia 0 1 0 0 0 0 0 0 ... Unknown -0.358216 42.545087 Political Non-state armed group: Regional Al-Itihaad al-Islamiya 1 INGO international staff killed by Al ittiha... Archived Archived False
4 1997 Rwanda 1 0 0 0 0 0 1 0 ... Unknown -1.950851 30.061508 Political Unknown Unknown 1 UN national staff shot and killed in Kigali ... Archived Archived False

5 rows × 35 columns

Code
# Import necessary libraries for interactive plotting
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display

# Group data by year and country to prepare for animation
incidents_by_year_country = df.groupby(['year', 'country']).size().reset_index(name='incidents')

# Get total incidents by year for the animation frame sequence
year_totals = incidents_by_year_country.groupby('year')['incidents'].sum().reset_index()
year_totals = year_totals.sort_values('year')

# Create animated choropleth map showing incidents by country over time
fig = px.choropleth(incidents_by_year_country,
                   locations='country',
                   locationmode='country names',
                   color='incidents',
                   animation_frame='year',
                   color_continuous_scale='Viridis',
                   range_color=[0, incidents_by_year_country['incidents'].max()],
                   title='Security Incidents by Country Over Time',
                   labels={'incidents': 'Number of Incidents'},
                   height=600)

# Improve layout
fig.update_layout(
    coloraxis_colorbar=dict(
        title='Number of Incidents'
    ),
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type='natural earth'
    )
)

# Add slider and play button settings
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1000
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500

# Add annotation for total incidents per year
for year in year_totals['year'].unique():
    year_total = year_totals.loc[year_totals['year'] == year, 'incidents'].values[0]
    fig.frames[int(year-year_totals['year'].min())].layout.annotations = [
        dict(
            x=0.5,
            y=1.05,
            xref='paper',
            yref='paper',
            text=f'Total Incidents in {year}: {year_total}',
            showarrow=False,
            font=dict(
                size=16
            )
        )
    ]

# For Quarto output, save as HTML
fig.write_html("images/interactive_incidents_over_time.html")

# Display for notebook viewing
fig.show()

# Create an alternative interactive bar chart with year slider
year_incidents = df.groupby('year').size().reset_index(name='incidents')
year_incidents['year'] = year_incidents['year'].astype(str)  # Convert year to string for better display

fig2 = px.bar(year_incidents, 
              x='year', 
              y='incidents',
              title='Interactive Security Incidents by Year',
              labels={'incidents': 'Number of Incidents', 'year': 'Year'},
              height=500)

# Add range slider
fig2.update_layout(
    xaxis=dict(
        rangeslider=dict(visible=True),
        type='category'  # Use category type for discrete years
    ),
    bargap=0.1,
    template='plotly_white'
)

# Save the interactive bar chart
fig2.write_html("images/interactive_yearly_incidents_barchart.html")

# Show the bar chart
fig2.show()

print("Interactive visualizations saved as HTML files in the images directory.")
Interactive visualizations saved as HTML files in the images directory.
Code
# Filter data for Palestine, Ukraine, and South Sudan
focus_countries = ['Occupied Palestinian Territories', 'Ukraine', 'South Sudan']
focus_df = df[df['country'].isin(focus_countries)]

# Check if we have data for these countries
countries_found = focus_df['country'].unique()
print(f"Found data for: {', '.join(countries_found)}")
print(f"Total incidents: {len(focus_df)}")

# Group data by year and country
incidents_by_year_country = focus_df.groupby(['year', 'country']).size().reset_index(name='incidents')

# Get available years range
min_year = focus_df['year'].min()
max_year = focus_df['year'].max()

# Create line plot with markers
plt.figure(figsize=(14, 7))

# Define distinct colors for each country
colors = {'Palestine': 'green', 'Ukraine': 'blue', 'South Sudan': 'red'}
markers = {'Palestine': 'o', 'Ukraine': 's', 'South Sudan': '^'}

for country in countries_found:
    country_data = incidents_by_year_country[incidents_by_year_country['country'] == country]
    plt.plot(country_data['year'], country_data['incidents'], 
             marker=markers.get(country, 'o'), 
             linewidth=2.5, 
             color=colors.get(country),
             label=country)

plt.title('Security Incidents in Palestine, Ukraine, and South Sudan', fontsize=14)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Incidents', fontsize=12)
plt.grid(True, alpha=0.3)
plt.legend(fontsize=12)

# Set x-axis to show all years
plt.xticks(range(min_year, max_year+1), rotation=45)

# Add annotations for key events
key_events = {
    'Ukraine': [
        (2014, 'Crimea annexation'),
        (2022, 'Full-scale invasion')
    ],
    'South Sudan': [
        (2011, 'Independence'),
        (2013, 'Civil war begins'),
        (2018, 'Peace agreement')
    ],
    'Palestine': [
        (2008, '2008-09 Gaza War'),
        (2014, '2014 Gaza War'),
        (2021, 'May 2021 conflict'),
        (2023, 'Oct 2023 conflict')
    ]
}

# Add annotations if the years are in our data
for country in countries_found:
    if country in key_events:
        for year, event in key_events[country]:
            # Check if this year exists in our data for this country
            year_data = incidents_by_year_country[(incidents_by_year_country['country'] == country) & 
                                                 (incidents_by_year_country['year'] == year)]
            if not year_data.empty:
                incidents_value = year_data['incidents'].values[0]
                # Calculate vertical offset based on the data range
                y_range = incidents_by_year_country['incidents'].max() - incidents_by_year_country['incidents'].min()
                offset = y_range * 0.1  # 10% of the range
                
                plt.annotate(f'{event}', 
                            xy=(year, incidents_value),
                            xytext=(year, incidents_value + offset),
                            arrowprops=dict(facecolor=colors.get(country), shrink=0.05, width=1, headwidth=5),
                            fontsize=9,
                            color=colors.get(country))

plt.tight_layout()
plt.savefig('images/three_countries_incidents_comparison.png', dpi=300)
plt.show()

# Add a bar chart showing casualties for these countries over time
if 'total_affected' in focus_df.columns:
    casualties_by_year = focus_df.groupby(['year', 'country'])['total_affected'].sum().reset_index()
    
    plt.figure(figsize=(14, 7))
    
    for country in countries_found:
        country_data = casualties_by_year[casualties_by_year['country'] == country]
        plt.plot(country_data['year'], country_data['total_affected'], 
                marker=markers.get(country, 'o'), 
                linewidth=2.5, 
                color=colors.get(country),
                label=country)
    
    plt.title('Casualties from Security Incidents in Palestine, Ukraine, and South Sudan', fontsize=14)
    plt.xlabel('Year', fontsize=12)
    plt.ylabel('Total Casualties', fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.legend(fontsize=12)
    plt.xticks(range(min_year, max_year+1), rotation=45)
    plt.tight_layout()
    plt.savefig('images/three_countries_casualties_comparison.png', dpi=300)
    plt.show()

# Create a stacked bar chart to show the composition of casualties
if all(col in focus_df.columns for col in ['total_killed', 'total_wounded', 'total_kidnapped']):
    # Group by year and country, summing up different types of casualties
    casualty_types = focus_df.groupby(['year', 'country']).agg({
        'total_killed': 'sum',
        'total_wounded': 'sum',
        'total_kidnapped': 'sum'
    }).reset_index()
    
    # Create subplots, one for each country
    fig, axes = plt.subplots(len(countries_found), 1, figsize=(14, 4*len(countries_found)), sharex=True)
    
    for i, country in enumerate(countries_found):
        country_data = casualty_types[casualty_types['country'] == country]
        
        # Convert to wide format for stacked bar
        country_data_stacked = country_data.set_index('year')
        
        # Plot stacked bar
        country_data_stacked[['total_killed', 'total_wounded', 'total_kidnapped']].plot(
            kind='bar', stacked=True, ax=axes[i] if len(countries_found) > 1 else axes,
            color=['darkred', 'orange', 'purple']
        )
        
        # Set title and labels
        if len(countries_found) > 1:
            axes[i].set_title(f'{country}: Casualties by Type', fontsize=12)
            axes[i].set_ylabel('Number of Casualties', fontsize=10)
            if i == len(countries_found) - 1:
                axes[i].set_xlabel('Year', fontsize=10)
        else:
            axes.set_title(f'{country}: Casualties by Type', fontsize=12)
            axes.set_ylabel('Number of Casualties', fontsize=10)
            axes.set_xlabel('Year', fontsize=10)
    
    plt.tight_layout()
    plt.savefig('images/three_countries_casualty_types.png', dpi=300)
    plt.show()
Found data for: South Sudan, Occupied Palestinian Territories, Ukraine
Total incidents: 785

Code
# Get the count of incidents by year
incidents_by_year = df['year'].value_counts().sort_index()

# Print the number of incidents for each year
print("Number of incidents by year:")
for year, count in incidents_by_year.items():
    print(f"{year}: {count} incidents")

# Create a bar chart to visualize the trend
plt.figure(figsize=(12, 6))
sns.barplot(x=incidents_by_year.index, y=incidents_by_year.values, palette='viridis')
plt.title('Number of Security Incidents by Year')
plt.xlabel('Year')
plt.ylabel('Number of Incidents')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# Get the total number of incidents
total_incidents = len(df)
print(f"\nTotal number of incidents in the dataset: {total_incidents}")

# Calculate year range
min_year = df['year'].min()
max_year = df['year'].max()
print(f"Dataset covers incidents from {min_year} to {max_year} ({max_year - min_year + 1} years)")
Number of incidents by year:
1997: 34 incidents
1998: 26 incidents
1999: 32 incidents
2000: 42 incidents
2001: 29 incidents
2002: 46 incidents
2003: 63 incidents
2004: 64 incidents
2005: 74 incidents
2006: 106 incidents
2007: 124 incidents
2008: 164 incidents
2009: 156 incidents
2010: 132 incidents
2011: 152 incidents
2012: 162 incidents
2013: 253 incidents
2014: 194 incidents
2015: 150 incidents
2016: 164 incidents
2017: 159 incidents
2018: 229 incidents
2019: 276 incidents
2020: 283 incidents
2021: 272 incidents
2022: 247 incidents
2023: 281 incidents
2024: 353 incidents
2025: 47 incidents


Total number of incidents in the dataset: 4314
Dataset covers incidents from 1997 to 2025 (29 years)
Code
# Create an interactive map of all incidents
def create_incidents_map(data):
    # Calculate center coordinates for the map (average of all points)
    center_lat = data['latitude'].mean()
    center_lon = data['longitude'].mean()
    
    # Create a map centered on the average coordinates
    incidents_map = folium.Map(location=[center_lat, center_lon], zoom_start=2)
    
    # Add a marker cluster for better performance with many points
    marker_cluster = MarkerCluster().add_to(incidents_map)
    
    # Add points for each incident with coordinates
    valid_coords = data[data['latitude'].notna() & data['longitude'].notna()]
    
    # Create a color scale based on total_affected
    def get_color(affected):
        if pd.isna(affected) or affected == 0:
            return 'blue'
        elif affected <= 5:
            return 'green'
        elif affected <= 20:
            return 'orange'
        else:
            return 'red'
    
    for idx, row in valid_coords.iterrows():
        # Create popup text with incident details
        popup_text = f"""
        <b>Country:</b> {row['country']}<br>
        <b>Year:</b> {row['year']}<br>
        <b>Total Affected:</b> {row['total_affected']}<br>
        <b>Attack Type:</b> {row['means_of_attack'] if 'means_of_attack' in row and pd.notna(row['means_of_attack']) else 'Unknown'}<br>
        """
        
        # Add circle marker
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            popup=folium.Popup(popup_text, max_width=300),
            fill=True,
            fill_opacity=0.7,
            color=get_color(row['total_affected']),
            fill_color=get_color(row['total_affected'])
        ).add_to(marker_cluster)
    
    return incidents_map

# Create the map
global_incidents_map = create_incidents_map(df)

# Save the map as HTML file
map_filename = "images/global_security_incidents_map.html"
global_incidents_map.save(map_filename)

# Display in notebook (if running in Jupyter)
global_incidents_map
Make this Notebook Trusted to load map: File -> Trust Notebook
Code
# Filter data for incidents from 2015-2025
recent_df = df[(df['year'] >= 2015) & (df['year'] <= 2025)]

print(f"Number of incidents from 2015-2025: {len(recent_df)}")

# Create an interactive map of recent incidents (2015-2025)
def create_recent_incidents_map(data):
    # Calculate center coordinates for the map (average of all points)
    center_lat = data['latitude'].mean()
    center_lon = data['longitude'].mean()
    
    # Create a map centered on the average coordinates
    incidents_map = folium.Map(location=[center_lat, center_lon], zoom_start=2)
    
    # Add a marker cluster for better performance with many points
    marker_cluster = MarkerCluster().add_to(incidents_map)
    
    # Add points for each incident with coordinates
    valid_coords = data[data['latitude'].notna() & data['longitude'].notna()]
    
    # Create a color scale based on total_affected
    def get_color(affected):
        if pd.isna(affected) or affected == 0:
            return 'blue'
        elif affected <= 5:
            return 'green'
        elif affected <= 20:
            return 'orange'
        else:
            return 'red'
    
    for idx, row in valid_coords.iterrows():
        # Create popup text with incident details
        popup_text = f"""
        <b>Country:</b> {row['country']}<br>
        <b>Year:</b> {row['year']}<br>
        <b>Total Affected:</b> {row['total_affected']}<br>
        <b>Attack Type:</b> {row['means_of_attack'] if 'means_of_attack' in row and pd.notna(row['means_of_attack']) else 'Unknown'}<br>
        """
        
        # Add circle marker
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            popup=folium.Popup(popup_text, max_width=300),
            fill=True,
            fill_opacity=0.7,
            color=get_color(row['total_affected']),
            fill_color=get_color(row['total_affected'])
        ).add_to(marker_cluster)
    
    return incidents_map

# Create the map for recent incidents
recent_incidents_map = create_recent_incidents_map(recent_df)

# Save the map as HTML file
map_filename = "images/recent_security_incidents_map_2015_2025.html"
recent_incidents_map.save(map_filename)

print(f"Interactive map of recent incidents (2015-2025) saved as {map_filename}")

# Display in notebook (if running in Jupyter)
recent_incidents_map
Number of incidents from 2015-2025: 2461
Interactive map of recent incidents (2015-2025) saved as images/recent_security_incidents_map_2015_2025.html
Make this Notebook Trusted to load map: File -> Trust Notebook